library("igraph")
library("plotly")
edges <- read.csv( "emails.edges" , header = TRUE, ,sep ='')
edges_with_relation <- edges
edges_with_relation$fromGestor <- edges_with_relation$from < 144
edges_with_relation$toGestor <- edges_with_relation$to < 144
g <- graph.data.frame( edges , directed = TRUE)
g <- simplify(g, edge.attr.comb = "sum")
d <- degree(as.undirected(g)) #Grados de los nodos
max(d) #Grado máximo
min(d) #Grado mínimo
d[which.max(degree(as.undirected(g)))] #Nodo que tiene el grado máximo
# Creación de una tabla con los ID's
nodes_df <- as.data.frame(d)
nodes_df <- cbind(id = rownames(nodes_df), nodes_df)
rownames(nodes_df) <- 1:nrow(nodes_df)
nodes_df$id <- as.numeric(nodes_df$id)
cat("\n Número de empleados\n",length(unique(nodes_df[["id"]])))

 Número de empleados
 87273
nodes_df$d_in <- as.data.frame(degree(g, mode="in"))$d
nodes_df$d_out <- as.data.frame(degree(g, mode="out"))$d
nodes_df$isGestor <- ifelse(nodes_df$id < 144, 1, 0)

nodes_df <- nodes_df %>%
  arrange(nodes_df$id)
fig <- plot_ly(x=nodes_df$d, type = "box", name = "") 

fig <- fig %>% layout(yaxis=list(type='linear'))

fig <- fig %>%
        layout(title = 'Distribución de los grados de los nodos', plot_bgcolor = "#e5ecf6")

fig <- fig %>% layout(
  barmode="group",
  bargap=0.2)

fig
nodes_manager_df <- subset(nodes_df, isGestor == 1)
nodes_regular_employee_df <- subset(nodes_df, isGestor == 0)
fig <- plot_ly(x=nodes_manager_df$d, histfunc='sum', type = "histogram", name = "Managers", nbinsx=25) 

fig <- fig %>% layout(yaxis=list(type='linear'))

fig <- fig %>%
        layout(title = 'Distribución de los grados de managers', plot_bgcolor = "#e5ecf6")

fig <- fig %>% layout(
  barmode="group",
  bargap=0.2)

fig
fig <- plot_ly(x=nodes_regular_employee_df$d, histfunc='sum', type = "histogram", name = "Regular employees", nbinsx=25) 

fig <- fig %>% layout(yaxis=list(type='linear'))

fig <- fig %>%
        layout(title = 'Distribución de los grados de empleados regulares', plot_bgcolor = "#e5ecf6")

fig <- fig %>% layout(
  barmode="group",
  bargap=0.2)

fig
cat("Grado medio de los nodos managers: \t", mean(nodes_manager_df$d))
cat("\nGrado medio de los nodos regulares: \t", mean(nodes_regular_employee_df$d))

cat("\nGrado máximo de los nodos managers: \t", max(nodes_manager_df$d))
cat("\nGrado máximo de los nodos regulares: \t", max(nodes_regular_employee_df$d))

degree
top_degrees <- nodes_df %>%
  arrange(desc(d)) %>% head(10)
fig <- plot_ly(
  x = as.factor(top_degrees$id),
  y = top_degrees$d,
  name = "Top 10 nodos con mayor grado",
  text = top_degrees$d, # add the text argument
  textposition = "inside", # display the text inside the bars
  type = "bar"
)
fig

Distribución de grados entrantes y salientes

top_degrees_in <- nodes_df %>%
  arrange(desc(d_in)) %>% head(10)

fig <- plot_ly(
  x = as.factor(top_degrees_in$id),
  y = top_degrees_in$d_in,
  name = "Top 10 nodos con mayor grado entrante",
  text = top_degrees_in$d_in, # add the text argument
  textposition = "inside", # display the text inside the bars
  type = "bar"
)
fig
top_degrees_in <- nodes_df %>%
  arrange(desc(d_out)) %>% head(10)

fig <- plot_ly(
  x = as.factor(top_degrees_in$id),
  y = top_degrees_in$d_out,
  name = "Top 10 nodos con mayor grado saliente",
  text = top_degrees_in$d_out, # add the text argument
  textposition = "inside", # display the text inside the bars
  type = "bar"
)
fig
fig <- plot_ly(x=nodes_manager_df$d_in, histfunc='sum', type = "histogram", name = "Grados entrantes", nbinsx=25)  %>%add_trace(x=nodes_manager_df$d_out, histfunc='sum', type = "histogram", name = "Grados salientes", nbinsx=25) 

fig <- fig %>% layout(yaxis=list(type='linear'))

fig <- fig %>%
        layout(title = 'Distribución de los grados entrantes/salientes de gestores', plot_bgcolor = "#e5ecf6")

fig <- fig %>% layout(
  barmode="group",
  bargap=0.2)

fig
fig <- plot_ly(x=nodes_regular_employee_df$d_in, histfunc='sum', type = "histogram", name = "Grados entrantes", nbinsx=25)  %>%add_trace(x=nodes_regular_employee_df$d_out, histfunc='sum', type = "histogram", name = "Grados salientes", nbinsx=25) 

fig <- fig %>% layout(yaxis=list(type='linear'))

fig <- fig %>%
        layout(title = 'Distribución de los grados entrantes/salientes de empleados regulares', plot_bgcolor = "#e5ecf6")

fig <- fig %>% layout(
  barmode="group",
  bargap=0.2)

fig

Simplificación para medidas estructurales

Dado que no es computacionalmente factible calcular las medidas estructurales con el grafo de emails.edges original, hemos decidido simplificar el problema planteando un subgrafo colapsado para un periodo de tiempo más reducido a partir del fichero emails-timestamps.edges, en este caso se ha considerado el año 2001.

Medidas estructurales sobre el grafo reducido

edges <- read.csv( "emails-reduced-december-2000.edges" , header = TRUE, ,sep ='')
edges_with_relation <- edges
edges_with_relation$weight <-NULL
edges_with_relation$fromGestor <- edges_with_relation$from < 144
edges_with_relation$toGestor <- edges_with_relation$to < 144
g <- graph.data.frame( edges , directed = TRUE)
g <- simplify(g, edge.attr.comb = "sum")
Error in simplify(g, edge.attr.comb = "sum") : 
  unused argument (edge.attr.comb = "sum")
diameter(g, directed=TRUE, weight=NA)
diameter(g, directed=FALSE, weight=NA)
#distances(as.undirected(g), weight=NA) # --> Demasiado grande!
mean_distance(as.undirected(g))
#all_shortest_paths(as.undirected(g), from=2, weights=NA) # --> Demasiado grande!
#Componentes del grafo
is.connected(g, mode="weak")
[1] FALSE
is.connected(g, mode="strong")
[1] FALSE
componentes <- components(g, mode="strong")

length(componentes$csize)
[1] 78058
cd <-component_distribution(g)

cmax <- induced.subgraph(g, which(clusters(g)$membership == which.max(clusters(g)$csize)))
articulation.points(g)
+ 4152/87273 vertices, named, from 2927be6:
   [1] 9186  13547 11710 11453 10800 1356  2909  19310 10584 20769 26948 1564  17964 18351 22536 21674 1296  11919 403  
  [20] 12452 12337 268   267   17865 960   11722 12591 5227  13552 13394 14017 11909 11910 14604 3082  3023  27161 27162
  [39] 52879 27971 3158  13545 8427  566   19978 3081  19206 17984 31362 66659 574   494   33039 37622 4254  36580 8362 
  [58] 19073 17222 1420  1454  5758  13802 11807 17427 17376 28250 66707 1012  2356  39524 39543 59698 80041 2301  20344
  [77] 41230 80926 19802 18715 18705 42727 6192  36456 30437 85745 33723 3702  71807 12051 28492 5573  35184 472   47929
  [96] 48015 49221 48952 48675 47996 7544  49025 48295 48484 48422 48508 48505 21469 26675 23751 21660 25122 21153 21052
 [115] 21176 21012 20846 21863 22199 26995 21933 21476 21448 21166 22796 21787 27005 27000 21116 24037 64327 64317 45662
 [134] 31666 6720  51266 46235 30122 30302 30216 30206 45701 37476 41631 41719 41615 41624 9914  40025 877   14491 38898
 [153] 7654  38706 38694 7630  7655  7615  7638  7632  30756 29313 28398 28703 46628 10214 63777 77597 77674 77621 5492 
 [172] 3528  59131 4363  74732 48833 73489 74838 33738 57548 57508 57512 57709 60527 80234 80241 45675 8293  80389 82086
+ ... omitted several vertices
max(componentes$csize)
[1] 9164
V(g)$isGestor <- nodes_df$isGestor
#Medidas estructurales
graph.density(as.undirected(g))
transitivity(g)
reciprocity(g)
assortativity.degree(g)
assortativity_nominal(g,as.factor(V(g)$isGestor))
clique.number(g)
cliques(g)

Medidas de centralidad

b <- betweenness(g)
c <- closeness(g)
e <- eigen_centrality(g)$vector
pr <- page_rank(g,directed=FALSE,weights=NA)$vector
nodes_df$b <-b
nodes_df$c <-c
nodes_df$e <-e
nodes_df$pr <-pr
top_betweenness <- nodes_df %>%
  arrange(desc(b)) %>% head(5)

fig <- plot_ly(
  x = as.factor(top_betweenness$id),
  y = top_betweenness$b,
  name = "Top 10 nodos con mayor grado",
  text = top_betweenness$b, # add the text argument
  textposition = "inside", # display the text inside the bars
  type = "bar"
)
fig <- fig %>%
        layout(title = 'Top 5 centralidades de intermediario', plot_bgcolor = "#e5ecf6")

fig
top_closenness <- nodes_df %>%
  arrange(desc(c)) %>% head(5)

fig <- plot_ly(
  x = as.factor(top_closenness$id),
  y = top_closenness$c,
  name = "Top 10 nodos con mayor grado",
  text = top_closenness$c, # add the text argument
  textposition = "inside", # display the text inside the bars
  type = "bar"
)
fig <- fig %>%
        layout(title = 'Top 5 centralidades de cercanía', plot_bgcolor = "#e5ecf6")

fig
top_eigenvector <- nodes_df %>%
  arrange(desc(e)) %>% head(5)

fig <- plot_ly(
  x = as.factor(top_eigenvector$id),
  y = top_eigenvector$e,
  name = "Top 10 nodos con mayor grado",
  text = top_eigenvector$e, # add the text argument
  textposition = "inside", # display the text inside the bars
  type = "bar"
)
fig <- fig %>%
        layout(title = 'Top 5 centralidades de autovector', plot_bgcolor = "#e5ecf6")

fig
top_pagerank<- nodes_df %>%
  arrange(desc(pr)) %>% head(5)

fig <- plot_ly(
  x = as.factor(top_pagerank$id),
  y = top_pagerank$pr,
  name = "Top 10 nodos con mayor grado",
  text = top_pagerank$pr, # add the text argument
  textposition = "inside", # display the text inside the bars
  type = "bar"
)
fig <- fig %>%
        layout(title = 'Top 5 centralidades de PageRank', plot_bgcolor = "#e5ecf6")

fig

Comunidades

#Comunidades - Girvan-Newman
ebc <- edge.betweenness.community(as.undirected(g))
Warning: At core/community/edge_betweenness.c:493 : Membership vector will be selected based on the highest modularity score.Warning: At core/community/edge_betweenness.c:500 : Modularity calculation with weighted edge betweenness community detection might not make sense -- modularity treats edge weights as similarities while edge betwenness treats them as distances.
NA
plot(ebc, as.undirected(g))
plot_dendrogram(ebc)
#Comunidades - Particiones espectrales
lec <- leading.eigenvector.community(as.undirected(g))
plot(lec, as.undirected(g))
#Comunidades - Louvain
plot(cluster_louvain(as.undirected(g)),as.undirected(g))
#Comunidades - Leiden
plot(cluster_leiden(as.undirected(g),objective_function = "modularity"),as.undirected(g))
---
title: "Emails analysis"
output: html_notebook
---

```{r}
library("igraph")
library("plotly")
```

```{r}
edges <- read.csv( "emails.edges" , header = TRUE, ,sep ='')
edges_with_relation <- edges
edges_with_relation$fromGestor <- edges_with_relation$from < 144
edges_with_relation$toGestor <- edges_with_relation$to < 144
g <- graph.data.frame( edges , directed = TRUE)
g <- simplify(g, edge.attr.comb = "sum")
d <- degree(as.undirected(g)) #Grados de los nodos
max(d) #Grado máximo
min(d) #Grado mínimo
d[which.max(degree(as.undirected(g)))] #Nodo que tiene el grado máximo
```

```{r}
# Creación de una tabla con los ID's
nodes_df <- as.data.frame(d)
nodes_df <- cbind(id = rownames(nodes_df), nodes_df)
rownames(nodes_df) <- 1:nrow(nodes_df)
nodes_df$id <- as.numeric(nodes_df$id)
cat("\n Número de empleados\n",length(unique(nodes_df[["id"]])))
nodes_df$d_in <- as.data.frame(degree(g, mode="in"))$d
nodes_df$d_out <- as.data.frame(degree(g, mode="out"))$d
nodes_df$isGestor <- ifelse(nodes_df$id < 144, 1, 0)

nodes_df <- nodes_df %>%
  arrange(nodes_df$id)

```

```{r}
fig <- plot_ly(x=nodes_df$d, type = "box", name = "") 

fig <- fig %>% layout(yaxis=list(type='linear'))

fig <- fig %>%
        layout(title = 'Distribución de los grados de los nodos', plot_bgcolor = "#e5ecf6")

fig <- fig %>% layout(
  barmode="group",
  bargap=0.2)

fig
```


```{r}
nodes_manager_df <- subset(nodes_df, isGestor == 1)
nodes_regular_employee_df <- subset(nodes_df, isGestor == 0)
```

```{r}
fig <- plot_ly(x=nodes_manager_df$d, histfunc='sum', type = "histogram", name = "Managers", nbinsx=25) 

fig <- fig %>% layout(yaxis=list(type='linear'))

fig <- fig %>%
        layout(title = 'Distribución de los grados de managers', plot_bgcolor = "#e5ecf6")

fig <- fig %>% layout(
  barmode="group",
  bargap=0.2)

fig
```

```{r}
fig <- plot_ly(x=nodes_regular_employee_df$d, histfunc='sum', type = "histogram", name = "Regular employees", nbinsx=25) 

fig <- fig %>% layout(yaxis=list(type='linear'))

fig <- fig %>%
        layout(title = 'Distribución de los grados de empleados regulares', plot_bgcolor = "#e5ecf6")

fig <- fig %>% layout(
  barmode="group",
  bargap=0.2)

fig
```

```{r}
cat("Grado medio de los nodos managers: \t", mean(nodes_manager_df$d))
cat("\nGrado medio de los nodos regulares: \t", mean(nodes_regular_employee_df$d))

cat("\nGrado máximo de los nodos managers: \t", max(nodes_manager_df$d))
cat("\nGrado máximo de los nodos regulares: \t", max(nodes_regular_employee_df$d))

degree

```

```{r}
top_degrees <- nodes_df %>%
  arrange(desc(d)) %>% head(10)
```

```{r}
fig <- plot_ly(
  x = as.factor(top_degrees$id),
  y = top_degrees$d,
  name = "Top 10 nodos con mayor grado",
  text = top_degrees$d, # add the text argument
  textposition = "inside", # display the text inside the bars
  type = "bar"
)
fig
```




### Distribución de grados entrantes y salientes


```{r}
top_degrees_in <- nodes_df %>%
  arrange(desc(d_in)) %>% head(10)

fig <- plot_ly(
  x = as.factor(top_degrees_in$id),
  y = top_degrees_in$d_in,
  name = "Top 10 nodos con mayor grado entrante",
  text = top_degrees_in$d_in, # add the text argument
  textposition = "inside", # display the text inside the bars
  type = "bar"
)
fig
```

```{r}
top_degrees_in <- nodes_df %>%
  arrange(desc(d_out)) %>% head(10)

fig <- plot_ly(
  x = as.factor(top_degrees_in$id),
  y = top_degrees_in$d_out,
  name = "Top 10 nodos con mayor grado saliente",
  text = top_degrees_in$d_out, # add the text argument
  textposition = "inside", # display the text inside the bars
  type = "bar"
)
fig

```

```{r}
fig <- plot_ly(x=nodes_manager_df$d_in, histfunc='sum', type = "histogram", name = "Grados entrantes", nbinsx=25)  %>%add_trace(x=nodes_manager_df$d_out, histfunc='sum', type = "histogram", name = "Grados salientes", nbinsx=25) 

fig <- fig %>% layout(yaxis=list(type='linear'))

fig <- fig %>%
        layout(title = 'Distribución de los grados entrantes/salientes de gestores', plot_bgcolor = "#e5ecf6")

fig <- fig %>% layout(
  barmode="group",
  bargap=0.2)

fig
```


```{r}
fig <- plot_ly(x=nodes_regular_employee_df$d_in, histfunc='sum', type = "histogram", name = "Grados entrantes", nbinsx=25)  %>%add_trace(x=nodes_regular_employee_df$d_out, histfunc='sum', type = "histogram", name = "Grados salientes", nbinsx=25) 

fig <- fig %>% layout(yaxis=list(type='linear'))

fig <- fig %>%
        layout(title = 'Distribución de los grados entrantes/salientes de empleados regulares', plot_bgcolor = "#e5ecf6")

fig <- fig %>% layout(
  barmode="group",
  bargap=0.2)

fig
```

### Simplificación para medidas estructurales

Dado que no es computacionalmente factible calcular las medidas estructurales con el grafo de emails.edges original, hemos decidido simplificar el problema planteando un subgrafo colapsado para un periodo de tiempo más reducido a partir del fichero emails-timestamps.edges, en este caso se ha considerado el año 2001.


### Medidas estructurales sobre el grafo reducido


```{r}
edges <- read.csv( "emails-reduced-december-2000.edges" , header = TRUE, ,sep ='')
edges_with_relation <- edges
edges_with_relation$weight <-NULL
edges_with_relation$fromGestor <- edges_with_relation$from < 144
edges_with_relation$toGestor <- edges_with_relation$to < 144
g <- graph.data.frame( edges , directed = TRUE)
g <- simplify(g, edge.attr.comb = "sum")
d <- degree(as.undirected(g)) #Grados de los nodos
max(d) #Grado máximo
min(d) #Grado mínimo
d[which.max(degree(as.undirected(g)))] #Nodo que tiene el grado máximo
```



```{r}
diameter(g, directed=TRUE, weight=NA)
diameter(g, directed=FALSE, weight=NA)
#distances(as.undirected(g), weight=NA) # --> Demasiado grande!
mean_distance(as.undirected(g))
#all_shortest_paths(as.undirected(g), from=2, weights=NA) # --> Demasiado grande!
```

```{r}
#Componentes del grafo
is.connected(g, mode="weak")
is.connected(g, mode="strong")
```

```{r}
componentes <- components(g, mode="strong")

length(componentes$csize)

cd <-component_distribution(g)

cmax <- induced.subgraph(g, which(clusters(g)$membership == which.max(clusters(g)$csize)))
```
```{r}
articulation.points(g)
```
```{r}
max(componentes$csize)
```

```{r}
V(g)$isGestor <- nodes_df$isGestor
```


```{r}
#Medidas estructurales
graph.density(as.undirected(g))
transitivity(g)
reciprocity(g)
assortativity.degree(g)
assortativity_nominal(g,as.factor(V(g)$isGestor))
```

```{r}
clique.number(g)
cliques(g)
```

### Medidas de centralidad

```{r}
b <- betweenness(g)
```

```{r}
c <- closeness(g)
```

```{r}
e <- eigen_centrality(g)$vector
```

```{r}
pr <- page_rank(g,directed=FALSE,weights=NA)$vector
```

```{r}
nodes_df$b <-b
nodes_df$c <-c
nodes_df$e <-e
nodes_df$pr <-pr
```

```{r}
top_betweenness <- nodes_df %>%
  arrange(desc(b)) %>% head(5)

fig <- plot_ly(
  x = as.factor(top_betweenness$id),
  y = top_betweenness$b,
  name = "Top 10 nodos con mayor grado",
  text = top_betweenness$b, # add the text argument
  textposition = "inside", # display the text inside the bars
  type = "bar"
)
fig <- fig %>%
        layout(title = 'Top 5 centralidades de intermediario', plot_bgcolor = "#e5ecf6")

fig
```



```{r}
top_closenness <- nodes_df %>%
  arrange(desc(c)) %>% head(5)

fig <- plot_ly(
  x = as.factor(top_closenness$id),
  y = top_closenness$c,
  name = "Top 10 nodos con mayor grado",
  text = top_closenness$c, # add the text argument
  textposition = "inside", # display the text inside the bars
  type = "bar"
)
fig <- fig %>%
        layout(title = 'Top 5 centralidades de cercanía', plot_bgcolor = "#e5ecf6")

fig
```

```{r}
top_eigenvector <- nodes_df %>%
  arrange(desc(e)) %>% head(5)

fig <- plot_ly(
  x = as.factor(top_eigenvector$id),
  y = top_eigenvector$e,
  name = "Top 10 nodos con mayor grado",
  text = top_eigenvector$e, # add the text argument
  textposition = "inside", # display the text inside the bars
  type = "bar"
)
fig <- fig %>%
        layout(title = 'Top 5 centralidades de autovector', plot_bgcolor = "#e5ecf6")

fig
```


```{r}
top_pagerank<- nodes_df %>%
  arrange(desc(pr)) %>% head(5)

fig <- plot_ly(
  x = as.factor(top_pagerank$id),
  y = top_pagerank$pr,
  name = "Top 10 nodos con mayor grado",
  text = top_pagerank$pr, # add the text argument
  textposition = "inside", # display the text inside the bars
  type = "bar"
)
fig <- fig %>%
        layout(title = 'Top 5 centralidades de PageRank', plot_bgcolor = "#e5ecf6")

fig
```


### Comunidades

```{r}
#Comunidades - Girvan-Newman
ebc <- edge.betweenness.community(as.undirected(g))
```

```{r}
plot(ebc, as.undirected(g))
plot_dendrogram(ebc)
```


```{r}
#Comunidades - Particiones espectrales
lec <- leading.eigenvector.community(as.undirected(g))
plot(lec, as.undirected(g))
```

```{r}
#Comunidades - Louvain
plot(cluster_louvain(as.undirected(g)),as.undirected(g))
```

```{r}
#Comunidades - Leiden
plot(cluster_leiden(as.undirected(g),objective_function = "modularity"),as.undirected(g))
```




